In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

file_path = './survey_list.xlsx'
df = pd.read_excel(file_path)

df.head()
Out[15]:
Index Terms & conditions 성별 생년월일 전화번호 선호하는 영화 장르 현재 정신 건강 관련 치료나 상담을 받고 있다. 내게 좋은 일이 일어난다면, 나는 보통 그 일로 인해 크게 영향을 받곤 한다. 내가 무언가를 잘해 냈을 때, 나는 그 상태를 계속 유지하고 싶어 한다. 나는 경기에서 이기면 보통 매우 흥분한다. ... 이 영화를 보고난 후 흥분(Excited)되거나 긴장되었다. (1-차분하다, 9-흥분되었다).8 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..8 이 영화를 이전에 본 적이 있습니까?.9 이 영화를 보고난 후 현재 기분이 즐겁고 행복하다. (1-불쾌하다, 9-즐겁다).9 이 영화를 보고난 후 흥분(Excited)되거나 긴장되었다. (1-차분하다, 9-흥분되었다).9 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..9 실험이 이해하기 쉬웠는지, 진행 방식이나 설명이 명확했는지, 개선할 점이 있다면 자유롭게 적어주세요. Submitter Submission Date Submission ID
0 4 ✓ 남자 1998. 1. 17. 1049468170 액션 | 코미디 아니오 다소 동의 매우 동의 매우 동의 ... 3 만족 (Contentment) 아니요 7 5 즐거움 (Pleasure) NaN NaN 2025. 3. 7. 오후 2:58:07 fb14df8ad543d1
1 5 ✓ 남자 2001. 12. 23 1051350406 코미디 | 드라마 아니오 다소 동의 다소 동의 매우 동의 ... 4 우울 (Depression) 아니요 7 6 행복 (Happiness) 이해하기 쉬웠어요 NaN 2025. 3. 7. 오후 3:02:49 2686e6d7d543d1
2 6 ✓ 남자 1224. 8. 1. 1039471109 액션 | 코미디 | 판타지 아니오 매우 동의 다소 동의 다소 동의 ... 4 슬픔 (Sadness) 아니요 5 3 평온 (Calmness) 한국어 번역이 조금더 있으면 좋겠습니다. NaN 2025. 3. 7. 오후 3:08:15 3b6e7dc0d543d1
3 7 ✓ 여자 2004. 1. 25. 1036149407 액션 아니오 다소 동의 다소 동의 다소 동의하지 않음 ... 4 슬픔 (Sadness) 아니요 5 1 평온 (Calmness) NaN NaN 2025. 3. 7. 오후 3:27:27 7e237f92deb748
4 8 ✓ 여자 2003. 9. 19. 1050337288 액션 | 드라마 | 판타지 | 코미디 아니오 다소 동의 다소 동의 매우 동의 ... 7 우울 (Depression) 네 8 6 평온 (Calmness) NaN NaN 2025. 3. 7. 오후 3:42:23 de6d9c4a0170b

5 rows × 71 columns

In [16]:
plt.rc('font', family='Malgun Gothic')
pd.set_option('mode.chained_assignment', None)
In [17]:
df = df.iloc[:, 2:-4]

df.head()
Out[17]:
성별 생년월일 전화번호 선호하는 영화 장르 현재 정신 건강 관련 치료나 상담을 받고 있다. 내게 좋은 일이 일어난다면, 나는 보통 그 일로 인해 크게 영향을 받곤 한다. 내가 무언가를 잘해 냈을 때, 나는 그 상태를 계속 유지하고 싶어 한다. 나는 경기에서 이기면 보통 매우 흥분한다. 내가 원하는 어떤 것을 얻게 되면, 나는 흔히 흥분하고 기운이 넘친다. 내가 좋아하는 어떤 것을 볼 기회를 갖게 되면 나는 곧바로 흥분한다. ... 이 영화를 보고난 후 흥분(Excited)되거나 긴장되었다. (1-차분하다, 9-흥분되었다).7 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..7 이 영화를 이전에 본 적이 있습니까?.8 이 영화를 보고난 후 현재 기분이 즐겁고 행복하다. (1-불쾌하다, 9-즐겁다).8 이 영화를 보고난 후 흥분(Excited)되거나 긴장되었다. (1-차분하다, 9-흥분되었다).8 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..8 이 영화를 이전에 본 적이 있습니까?.9 이 영화를 보고난 후 현재 기분이 즐겁고 행복하다. (1-불쾌하다, 9-즐겁다).9 이 영화를 보고난 후 흥분(Excited)되거나 긴장되었다. (1-차분하다, 9-흥분되었다).9 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..9
0 남자 1998. 1. 17. 1049468170 액션 | 코미디 아니오 다소 동의 매우 동의 매우 동의 매우 동의 매우 동의 ... 4 즐거움 (Pleasure) 아니요 6 3 만족 (Contentment) 아니요 7 5 즐거움 (Pleasure)
1 남자 2001. 12. 23 1051350406 코미디 | 드라마 아니오 다소 동의 다소 동의 매우 동의 매우 동의 다소 동의 ... 3 행복 (Happiness) 아니요 2 4 우울 (Depression) 아니요 7 6 행복 (Happiness)
2 남자 1224. 8. 1. 1039471109 액션 | 코미디 | 판타지 아니오 매우 동의 다소 동의 다소 동의 다소 동의 다소 동의 ... 3 행복 (Happiness) 아니요 3 4 슬픔 (Sadness) 아니요 5 3 평온 (Calmness)
3 여자 2004. 1. 25. 1036149407 액션 아니오 다소 동의 다소 동의 다소 동의하지 않음 다소 동의하지 않음 다소 동의하지 않음 ... 1 평온 (Calmness) 아니요 5 4 슬픔 (Sadness) 아니요 5 1 평온 (Calmness)
4 여자 2003. 9. 19. 1050337288 액션 | 드라마 | 판타지 | 코미디 아니오 다소 동의 다소 동의 매우 동의 다소 동의 다소 동의하지 않음 ... 2 평온 (Calmness) 아니요 3 7 우울 (Depression) 네 8 6 평온 (Calmness)

5 rows × 65 columns

In [18]:
import pandas as pd

def rename_columns(df):
    new_columns = {}
    movie_count = 0
    for col in df.columns:
        if "이 영화를 이전에 본 적이 있습니까" in col:
            new_columns[col] = f"Watched_Before_{movie_count}"
            movie_count += 1
        elif "이 영화를 보고난 후 현재 기분이 즐겁고 행복하다." in col:
            new_columns[col] = f"Valence_After_Movie_{movie_count - 1}"
        elif "이 영화를 보고난 후 흥분(Excited)되거나 긴장되었다." in col:
            new_columns[col] = f"Arousal_After_Movie_{movie_count - 1}"
        elif "다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요." in col:
            new_columns[col] = f"Emotion_Keyword_{movie_count - 1}"
        else:
            new_columns[col] = col
    return df.rename(columns=new_columns)

df = rename_columns(df)

df.head()
Out[18]:
성별 생년월일 전화번호 선호하는 영화 장르 현재 정신 건강 관련 치료나 상담을 받고 있다. 내게 좋은 일이 일어난다면, 나는 보통 그 일로 인해 크게 영향을 받곤 한다. 내가 무언가를 잘해 냈을 때, 나는 그 상태를 계속 유지하고 싶어 한다. 나는 경기에서 이기면 보통 매우 흥분한다. 내가 원하는 어떤 것을 얻게 되면, 나는 흔히 흥분하고 기운이 넘친다. 내가 좋아하는 어떤 것을 볼 기회를 갖게 되면 나는 곧바로 흥분한다. ... Arousal_After_Movie_7 Emotion_Keyword_7 Watched_Before_8 Valence_After_Movie_8 Arousal_After_Movie_8 Emotion_Keyword_8 Watched_Before_9 Valence_After_Movie_9 Arousal_After_Movie_9 Emotion_Keyword_9
0 남자 1998. 1. 17. 1049468170 액션 | 코미디 아니오 다소 동의 매우 동의 매우 동의 매우 동의 매우 동의 ... 4 즐거움 (Pleasure) 아니요 6 3 만족 (Contentment) 아니요 7 5 즐거움 (Pleasure)
1 남자 2001. 12. 23 1051350406 코미디 | 드라마 아니오 다소 동의 다소 동의 매우 동의 매우 동의 다소 동의 ... 3 행복 (Happiness) 아니요 2 4 우울 (Depression) 아니요 7 6 행복 (Happiness)
2 남자 1224. 8. 1. 1039471109 액션 | 코미디 | 판타지 아니오 매우 동의 다소 동의 다소 동의 다소 동의 다소 동의 ... 3 행복 (Happiness) 아니요 3 4 슬픔 (Sadness) 아니요 5 3 평온 (Calmness)
3 여자 2004. 1. 25. 1036149407 액션 아니오 다소 동의 다소 동의 다소 동의하지 않음 다소 동의하지 않음 다소 동의하지 않음 ... 1 평온 (Calmness) 아니요 5 4 슬픔 (Sadness) 아니요 5 1 평온 (Calmness)
4 여자 2003. 9. 19. 1050337288 액션 | 드라마 | 판타지 | 코미디 아니오 다소 동의 다소 동의 매우 동의 다소 동의 다소 동의하지 않음 ... 2 평온 (Calmness) 아니요 3 7 우울 (Depression) 네 8 6 평온 (Calmness)

5 rows × 65 columns

In [19]:
# Create a list of movies (0 to 9)
movies = range(10)

# Create a figure to hold multiple subplots
fig, axes = plt.subplots(10, 3, figsize=(18, 60))

# Define the emotion categories order (for consistent labeling if needed)
emotion_order = ['기쁨 (Delight)', '슬픔 (Sadness)', '행복 (Happiness)', '놀람 (Astonishment)', '우울 (Depression)', '피로 (Tiredness)', '짜증 (Annoyance)', '평온 (Calmness)', '두려움 (Fear)', '만족 (Contentment)', '즐거움 (Pleasure)', '좌절 (Frustration)', '기타 (None of These)']

# Loop through each movie
for i, movie in enumerate(movies):
    valence_col = f'Valence_After_Movie_{movie}'
    arousal_col = f'Arousal_After_Movie_{movie}'
    emotion_col = f'Emotion_Keyword_{movie}'

    # Plot for Valence
    sns.histplot(df[valence_col], kde=True, ax=axes[i, 0], color='blue')
    axes[i, 0].set_title(f'Valence_After_Movie_{movie} Distribution')
    axes[i, 0].set_xlabel(f'Valence_After_Movie_{movie}')
    axes[i, 0].set_ylabel('Frequency')
    axes[i, 0].set_xlim(df[valence_col].min(), df[valence_col].max())  # Fix x-axis range
    axes[i, 0].set_ylim(0, df[valence_col].value_counts().max())  # Fix y-axis range

    # Plot for Arousal
    sns.histplot(df[arousal_col], kde=True, ax=axes[i, 1], color='red')
    axes[i, 1].set_title(f'Arousal_After_Movie_{movie} Distribution')
    axes[i, 1].set_xlabel(f'Arousal_After_Movie_{movie}')
    axes[i, 1].set_ylabel('Frequency')
    axes[i, 1].set_xlim(df[arousal_col].min(), df[arousal_col].max())  # Fix x-axis range
    axes[i, 1].set_ylim(0, df[arousal_col].value_counts().max())  # Fix y-axis range

    # Plot for Emotion
    emotion_counts = df[emotion_col].value_counts().reindex(emotion_order, fill_value=0)  # Ensure order
    emotion_counts.plot(kind='bar', stacked=True, color=sns.color_palette("Set2", len(emotion_counts)), ax=axes[i, 2])
    axes[i, 2].set_title(f'Emotion_Keyword_{movie} Distribution')
    axes[i, 2].set_xlabel('Emotion Category')
    axes[i, 2].set_ylabel('Frequency')
    axes[i, 2].set_ylim(0, emotion_counts.max())  # Fix y-axis range
    axes[i, 2].set_xticklabels(emotion_counts.index, rotation=45, ha="right")  # Fix x-axis labels for emotion categories

# Adjust layout to avoid overlap
plt.tight_layout()
plt.show()
No description has been provided for this image
In [20]:
# Prepare to store results for table
results = []

# Loop through each movie to calculate the statistics
for i, movie in enumerate(range(10)):
    valence_col = f'Valence_After_Movie_{movie}'
    arousal_col = f'Arousal_After_Movie_{movie}'
    emotion_col = f'Emotion_Keyword_{movie}'

    valence_stats = df[valence_col].describe()
    arousal_stats = df[arousal_col].describe()

    # Get the emotion counts for the current movie
    emotion_counts = df[emotion_col].value_counts()
    top_emotions = emotion_counts.head(3)

    # Add the statistics and top emotions to results
    results.append({
        'Movie': movie,
        'Valence Mean': valence_stats['mean'],
        'Valence Var': valence_stats['std']**2,
        'Valence SD': valence_stats['std'],
        'Arousal Mean': arousal_stats['mean'],
        'Arousal Var': arousal_stats['std']**2,
        'Arousal SD': arousal_stats['std'],
        'Top 1 Emotion': top_emotions.index[0] if len(top_emotions) > 0 else "N/A",
        'Top 2 Emotion': top_emotions.index[1] if len(top_emotions) > 1 else None,
        'Top 3 Emotion': top_emotions.index[2] if len(top_emotions) > 2 else None,
    })

# Convert results to DataFrame for display
results_df = pd.DataFrame(results)

# Plot the Valence-Arousal Space
plt.figure(figsize=(10, 6))

# Iterate over movies and plot points
for i, row in results_df.iterrows():
    valence = row['Valence Mean']
    arousal = row['Arousal Mean']
    emotion = row['Top 1 Emotion']

    # Plot each movie's point
    plt.scatter(valence, arousal, color='blue', label=f'Movie {i+1}' if i == 0 else "")
    
    # Add text label with emotion
    plt.text(valence + 0.1, arousal + 0.1, f'{i+1}: {emotion}', fontsize=10, color='red')

# Add labels and title
plt.title('Valence-Arousal Space with Top Emotion Labels')
plt.xlabel('Valence (Mean)')
plt.ylabel('Arousal (Mean)')

# Set limits for valence and arousal
plt.xlim(0, 10) 
plt.ylim(0, 10)

# Remove grid
plt.grid(False)

# Draw quadrant axes
plt.axhline(y=5, color='black', linewidth=1.5)  # Horizontal mid-line
plt.axvline(x=5, color='black', linewidth=1.5)  # Vertical mid-line

# Show plot
plt.show()
No description has been provided for this image
In [ ]:
# Calculate IQR for Valence and Arousal (to remove outliers for the entire dataset)
valence_columns = [f'Valence_After_Movie_{i}' for i in range(10)]  # List of Valence columns for all movies
arousal_columns = [f'Arousal_After_Movie_{i}' for i in range(10)]  # List of Arousal columns for all movies

# Initialize filtered dataframe as df (initially no filtering)
df_filtered = df.copy()

# Loop through each Valence column to calculate IQR and filter outliers
for valence_col in valence_columns:
    Q1 = df_filtered[valence_col].quantile(0.25)
    Q3 = df_filtered[valence_col].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    # Apply the outlier filter to the entire dataset for Valence
    df_filtered = df_filtered[(df_filtered[valence_col] >= lower_bound) & (df_filtered[valence_col] <= upper_bound)]
    
    # Print the number of rows after filtering for Valence
    print(f"After filtering {valence_col}, number of rows: {df_filtered.shape[0]}")

# Loop through each Arousal column to calculate IQR and filter outliers
for arousal_col in arousal_columns:
    Q1 = df_filtered[arousal_col].quantile(0.25)
    Q3 = df_filtered[arousal_col].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    # Apply the outlier filter to the entire dataset for Arousal
    df_filtered = df_filtered[(df_filtered[arousal_col] >= lower_bound) & (df_filtered[arousal_col] <= upper_bound)]
    
    # Print the number of rows after filtering for Arousal
    print(f"After filtering {arousal_col}, number of rows: {df_filtered.shape[0]}")
After filtering Valence_After_Movie_0, number of rows: 32
After filtering Valence_After_Movie_1, number of rows: 31
After filtering Valence_After_Movie_2, number of rows: 26
After filtering Valence_After_Movie_3, number of rows: 26
After filtering Valence_After_Movie_4, number of rows: 25
After filtering Valence_After_Movie_5, number of rows: 25
After filtering Valence_After_Movie_6, number of rows: 25
After filtering Valence_After_Movie_7, number of rows: 21
After filtering Valence_After_Movie_8, number of rows: 21
After filtering Valence_After_Movie_9, number of rows: 21
After filtering Arousal_After_Movie_0, number of rows: 21
After filtering Arousal_After_Movie_1, number of rows: 21
After filtering Arousal_After_Movie_2, number of rows: 21
After filtering Arousal_After_Movie_3, number of rows: 21
After filtering Arousal_After_Movie_4, number of rows: 16
After filtering Arousal_After_Movie_5, number of rows: 15
After filtering Arousal_After_Movie_6, number of rows: 15
After filtering Arousal_After_Movie_7, number of rows: 11
After filtering Arousal_After_Movie_8, number of rows: 11
After filtering Arousal_After_Movie_9, number of rows: 11
In [ ]:
# Prepare to store results for table
results = []

# Loop through each movie to calculate the statistics
for i, movie in enumerate(range(10)):
    valence_col = f'Valence_After_Movie_{movie}'
    arousal_col = f'Arousal_After_Movie_{movie}'
    emotion_col = f'Emotion_Keyword_{movie}'

    valence_stats = df_filtered[valence_col].describe()
    arousal_stats = df_filtered[arousal_col].describe()

    # Get the emotion counts for the current movie
    emotion_counts = df_filtered[emotion_col].value_counts()
    top_emotions = emotion_counts.head(3)

    # Add the statistics and top emotions to results
    results.append({
        'Movie': movie,
        'Valence Mean': valence_stats['mean'],
        'Valence Var': valence_stats['std']**2,
        'Valence SD': valence_stats['std'],
        'Arousal Mean': arousal_stats['mean'],
        'Arousal Var': arousal_stats['std']**2,
        'Arousal SD': arousal_stats['std'],
        'Top 1 Emotion': top_emotions.index[0],
        'Top 2 Emotion': top_emotions.index[1] if len(top_emotions) > 1 else None,
        'Top 3 Emotion': top_emotions.index[2] if len(top_emotions) > 2 else None,
    })

# Convert results to DataFrame for display
results_df = pd.DataFrame(results)

# Display the results as a table in the notebook
print(results_df)
   Movie  Valence Mean  Valence Var  Valence SD  Arousal Mean  Arousal Var  \
0      0      5.545455     0.472727    0.687552      2.545455     3.072727   
1      1      5.636364     0.654545    0.809040      3.727273     4.018182   
2      2      7.272727     0.818182    0.904534      5.636364     2.854545   
3      3      6.272727     0.818182    0.904534      3.545455     2.272727   
4      4      2.272727     1.218182    1.103713      6.909091     0.490909   
5      5      3.272727     1.018182    1.009050      5.727273     0.618182   
6      6      2.909091     1.690909    1.300350      3.363636     3.054545   
7      7      6.818182     0.763636    0.873863      3.545455     0.672727   
8      8      5.000000     2.400000    1.549193      3.181818     2.363636   
9      9      6.909091     0.290909    0.539360      5.090909     1.690909   

   Arousal SD    Top 1 Emotion               Top 2 Emotion  \
0    1.752920    평온 (Calmness)              행복 (Happiness)   
1    2.004540    평온 (Calmness)  해당하는 감정이 없음(None of These)   
2    1.689540   즐거움 (Pleasure)                기쁨 (Delight)   
3    1.507557    평온 (Calmness)            만족 (Contentment)   
4    0.700649       두려움 (Fear)              짜증 (Annoyance)   
5    0.786245   짜증 (Annoyance)               평온 (Calmness)   
6    1.747726  우울 (Depression)  해당하는 감정이 없음(None of These)   
7    0.820200   행복 (Happiness)            만족 (Contentment)   
8    1.537412     슬픔 (Sadness)             우울 (Depression)   
9    1.300350   행복 (Happiness)              즐거움 (Pleasure)   

                Top 3 Emotion  
0  해당하는 감정이 없음(None of These)  
1              즐거움 (Pleasure)  
2            만족 (Contentment)  
3              행복 (Happiness)  
4           놀람 (Astonishment)  
5             우울 (Depression)  
6                슬픔 (Sadness)  
7              즐거움 (Pleasure)  
8              행복 (Happiness)  
9                기쁨 (Delight)  
In [ ]:
 
In [22]:
# Selecting Valence, Arousal, and Emotion columns
valence_columns = [col for col in df.columns if 'Valence_After_Movie_' in col]  # Valence columns
arousal_columns = [col for col in df.columns if 'Arousal_After_Movie_' in col]  # Arousal columns
emotion_columns = [col for col in df.columns if 'Emotion_Keyword_' in col]  # Emotion columns

# Select necessary columns for the analysis
df_selected = df[['전화번호'] + valence_columns + arousal_columns + emotion_columns]

# Add valence, arousal, and emotion for each movie
for i in range(10):
    df_selected[f'valence_{i+1}'] = df_selected[valence_columns[i]].copy()
    df_selected[f'arousal_{i+1}'] = df_selected[arousal_columns[i]].copy()
    df_selected[f'emotion_{i+1}'] = df_selected[emotion_columns[i]].copy()

# Get unique participants
people = df_selected['전화번호'].unique()

# Plot for each person
for person in people:
    try:
        # Extract data for the person
        person_data = df_selected[df_selected['전화번호'] == person]
        
        plt.figure(figsize=(10, 6))

        # Plot each movie's valence and arousal
        for i in range(10):
            valence = person_data[f'valence_{i+1}'].values[0]
            arousal = person_data[f'arousal_{i+1}'].values[0]
            emotion = person_data[f'emotion_{i+1}'].values[0]  # 감정 키워드

            # Plot each movie's point with labels
            plt.scatter(valence, arousal, label=f'Movie {i+1}', color='blue')
            plt.text(valence + 0.2, arousal + 0.2, f'{i+1}\n{emotion}', fontsize=10, color='red')

        # Add labels and title
        plt.title(f'{person}의 10개 영화에 대한 감정 분석')
        plt.xlabel('Valence')
        plt.ylabel('Arousal')

        # Set limits and remove grid
        plt.xlim(0, 10) 
        plt.ylim(0, 10)
        plt.grid(False)

        # Draw quadrant axes
        plt.axhline(y=5, color='black', linewidth=1.5)  # Horizontal mid-line
        plt.axvline(x=5, color='black', linewidth=1.5)  # Vertical mid-line

        plt.show() 
    
    except Exception as e:
        print(f"Error while processing {person}: {e}")
        continue
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [23]:
# Now loop through each participant to calculate the difference from the mean and rank them
final_results = []

# Loop through each participant to calculate the differences
for person in df['전화번호'].unique():
    person_data = df[df['전화번호'] == person]
    
    person_results = {'전화번호': person}  # Start with phone number

    # Loop through each movie for the person
    for i, movie in enumerate(range(10)):
        valence_col = f'Valence_After_Movie_{movie}'
        arousal_col = f'Arousal_After_Movie_{movie}'
        emotion_col = f'Emotion_Keyword_{movie}'

        # Get the mean and SD for Valence and Arousal for the current movie
        valence_mean = results_df.loc[results_df['Movie'] == movie, 'Valence Mean'].values[0]
        arousal_mean = results_df.loc[results_df['Movie'] == movie, 'Arousal Mean'].values[0]

        # Get the participant's Valence, Arousal, and Emotion for the movie
        valence = person_data[valence_col].values[0]
        arousal = person_data[arousal_col].values[0]
        emotion = person_data[emotion_col].values[0]

        # Calculate the differences from the mean
        valence_diff = abs(valence - valence_mean)
        arousal_diff = abs(arousal - arousal_mean)

        # Store the differences and the emotion in the result for this person
        person_results[f'Valence_{movie+1}_Diff'] = valence_diff
        person_results[f'Arousal_{movie+1}_Diff'] = arousal_diff
        person_results[f'Emotion_{movie+1}'] = emotion

    final_results.append(person_results)

# Convert the final result list to a DataFrame
final_df = pd.DataFrame(final_results)

# Sort the results based on the smallest difference in Valence and Arousal
# Corrected sorting code, flattening the difference columns
valence_diff_columns = [f'Valence_{i+1}_Diff' for i in range(10)]
arousal_diff_columns = [f'Arousal_{i+1}_Diff' for i in range(10)]

# Sort the DataFrame based on the sum of differences across all movies
final_df_sorted = final_df.loc[:, ['전화번호'] + valence_diff_columns + arousal_diff_columns + [f'Emotion_{i+1}' for i in range(10)]]
final_df_sorted['Total_Diff'] = final_df_sorted[valence_diff_columns + arousal_diff_columns].sum(axis=1)

# Sort by the 'Total_Diff'
final_df_sorted = final_df_sorted.sort_values(by='Total_Diff')

# Display the sorted results
print(final_df_sorted)

final_df_sorted.to_excel('final_df_sorted.xlsx', index=False)
          전화번호  Valence_1_Diff  Valence_2_Diff  Valence_3_Diff  \
5   1022307441        0.257143        0.228571        1.342857   
19  1041200913        0.742857        0.771429        3.657143   
9   1027642730        0.742857        0.771429        0.342857   
2   1039471109        0.742857        0.771429        0.342857   
12  1064186860        0.742857        0.771429        1.657143   
20  1032776526        0.742857        0.771429        2.657143   
14  1072325997        1.257143        1.228571        0.342857   
21  1029982403        0.257143        0.228571        1.342857   
6   1067510885        0.742857        0.771429        0.342857   
11  1045140817        0.257143        0.228571        2.342857   
25  1022621626        0.257143        1.228571        1.342857   
15  1023732291        0.742857        0.771429        0.342857   
31  1088847847        1.257143        0.228571        0.342857   
1   1051350406        0.257143        1.228571        1.342857   
29   104235208        0.257143        0.228571        1.342857   
28  1034385438        0.742857        0.771429        0.657143   
27  1096603229        0.742857        0.771429        0.342857   
0   1049468170        0.742857        0.771429        0.342857   
26  1099196625        0.257143        0.771429        0.342857   
22  1031713211        0.742857        0.771429        0.342857   
32  1024735837        0.742857        0.771429        1.342857   
33  1031142949        0.257143        0.771429        5.657143   
23  1057878321        2.257143        2.228571        0.342857   
17  1055583949        1.742857        2.228571        0.657143   
18  1029260113        2.257143        2.228571        2.342857   
30  1044575769        0.742857        0.771429        1.342857   
8   1089970961        3.257143        3.228571        1.657143   
13  1040131829        0.742857        0.771429        0.342857   
34  1026873582        0.257143        1.228571        3.657143   
24  1071947573        0.257143        0.771429        2.342857   
16  1093557084        0.257143        0.228571        2.342857   
3   1036149407        0.742857        0.771429        1.657143   
4   1050337288        1.257143        0.771429        1.342857   
7   1083414563        0.742857        0.771429        4.657143   
10  1051002851        0.742857        0.771429        2.342857   

    Valence_4_Diff  Valence_5_Diff  Valence_6_Diff  Valence_7_Diff  \
5         0.057143        1.571429        1.171429        0.371429   
19        0.057143        0.428571        1.171429        0.371429   
9         0.057143        1.428571        0.171429        0.371429   
2         0.942857        0.571429        0.171429        0.628571   
12        0.942857        1.428571        0.828571        0.371429   
20        0.057143        0.428571        0.171429        1.371429   
14        0.942857        0.571429        0.171429        0.371429   
21        0.057143        1.428571        0.171429        2.371429   
6         1.057143        0.428571        2.171429        0.371429   
11        0.057143        1.428571        0.828571        0.371429   
25        0.057143        0.428571        0.828571        0.628571   
15        1.057143        0.571429        0.171429        1.628571   
31        1.057143        0.571429        0.828571        2.371429   
1         2.057143        0.428571        0.171429        0.371429   
29        0.057143        1.571429        0.171429        2.371429   
28        0.942857        0.571429        0.171429        1.371429   
27        1.057143        1.428571        1.828571        0.371429   
0         1.057143        0.428571        2.171429        0.628571   
26        0.057143        4.571429        0.171429        0.371429   
22        0.942857        0.571429        1.171429        0.371429   
32        0.942857        1.571429        1.171429        2.371429   
33        1.057143        0.571429        0.828571        0.628571   
23        1.057143        1.428571        0.828571        1.628571   
17        0.942857        1.428571        0.828571        1.628571   
18        1.057143        0.428571        0.828571        0.371429   
30        0.057143        0.428571        0.828571        1.628571   
8         1.057143        1.428571        1.171429        1.371429   
13        0.942857        0.428571        1.828571        1.628571   
34        0.942857        3.571429        0.171429        0.628571   
24        1.057143        0.571429        1.828571        1.628571   
16        0.057143        1.428571        1.828571        1.628571   
3         1.942857        3.571429        3.171429        1.371429   
4         0.942857        1.428571        0.828571        1.628571   
7         0.942857        1.428571        1.828571        1.628571   
10        0.942857        1.428571        2.171429        1.628571   

    Valence_8_Diff  Valence_9_Diff  ...                   Emotion_2  \
5         0.028571        0.142857  ...  해당하는 감정이 없음(None of These)   
19        0.028571        0.142857  ...               평온 (Calmness)   
9         0.028571        0.142857  ...               평온 (Calmness)   
2         0.028571        1.857143  ...               평온 (Calmness)   
12        0.028571        0.857143  ...  해당하는 감정이 없음(None of These)   
20        0.028571        0.142857  ...               평온 (Calmness)   
14        1.028571        1.142857  ...             졸림 (Sleepiness)   
21        0.028571        1.142857  ...               평온 (Calmness)   
6         0.028571        0.857143  ...                슬픔 (Sadness)   
11        0.028571        0.142857  ...               평온 (Calmness)   
25        0.028571        2.142857  ...               평온 (Calmness)   
15        1.028571        0.142857  ...               평온 (Calmness)   
31        0.028571        0.142857  ...               평온 (Calmness)   
1         0.971429        2.857143  ...              즐거움 (Pleasure)   
29        0.028571        3.142857  ...               평온 (Calmness)   
28        0.028571        3.142857  ...               평온 (Calmness)   
27        1.028571        2.857143  ...               평온 (Calmness)   
0         0.971429        1.142857  ...               평온 (Calmness)   
26        0.028571        2.857143  ...             졸림 (Sleepiness)   
22        2.971429        0.142857  ...               평온 (Calmness)   
32        0.028571        0.142857  ...               평온 (Calmness)   
33        0.028571        3.142857  ...  해당하는 감정이 없음(None of These)   
23        1.028571        1.142857  ...                기쁨 (Delight)   
17        2.028571        1.857143  ...               평온 (Calmness)   
18        2.028571        0.142857  ...               평온 (Calmness)   
30        1.971429        0.857143  ...  해당하는 감정이 없음(None of These)   
8         1.028571        1.857143  ...               평온 (Calmness)   
13        1.971429        1.142857  ...             졸림 (Sleepiness)   
34        2.971429        0.142857  ...                기쁨 (Delight)   
24        2.028571        3.857143  ...               평온 (Calmness)   
16        2.028571        0.857143  ...               평온 (Calmness)   
3         0.971429        0.142857  ...               평온 (Calmness)   
4         0.971429        1.857143  ...               평온 (Calmness)   
7         1.971429        0.142857  ...  해당하는 감정이 없음(None of These)   
10        2.028571        4.142857  ...               평온 (Calmness)   

                     Emotion_3                   Emotion_4          Emotion_5  \
5                 기쁨 (Delight)               평온 (Calmness)         두려움 (Fear)   
19              피로 (Tiredness)            만족 (Contentment)     짜증 (Annoyance)   
9   해당하는 감정이 없음(None of These)               평온 (Calmness)  놀람 (Astonishment)   
2               즐거움 (Pleasure)               평온 (Calmness)    우울 (Depression)   
12              즐거움 (Pleasure)  해당하는 감정이 없음(None of These)         두려움 (Fear)   
20  해당하는 감정이 없음(None of These)            만족 (Contentment)         두려움 (Fear)   
14              즐거움 (Pleasure)              즐거움 (Pleasure)     짜증 (Annoyance)   
21              즐거움 (Pleasure)               평온 (Calmness)         두려움 (Fear)   
6               즐거움 (Pleasure)              행복 (Happiness)     짜증 (Annoyance)   
11              즐거움 (Pleasure)            만족 (Contentment)         두려움 (Fear)   
25              즐거움 (Pleasure)              즐거움 (Pleasure)  놀람 (Astonishment)   
15              행복 (Happiness)            만족 (Contentment)    졸림 (Sleepiness)   
31              즐거움 (Pleasure)               평온 (Calmness)     짜증 (Annoyance)   
1                 기쁨 (Delight)               평온 (Calmness)     짜증 (Annoyance)   
29              즐거움 (Pleasure)               평온 (Calmness)         두려움 (Fear)   
28                기쁨 (Delight)             졸림 (Sleepiness)         두려움 (Fear)   
27            만족 (Contentment)               평온 (Calmness)         두려움 (Fear)   
0             만족 (Contentment)            만족 (Contentment)  놀람 (Astonishment)   
26              즐거움 (Pleasure)              행복 (Happiness)  놀람 (Astonishment)   
22              즐거움 (Pleasure)               평온 (Calmness)         두려움 (Fear)   
32              즐거움 (Pleasure)  해당하는 감정이 없음(None of These)  놀람 (Astonishment)   
33                  두려움 (Fear)               평온 (Calmness)         두려움 (Fear)   
23              즐거움 (Pleasure)               평온 (Calmness)     짜증 (Annoyance)   
17               평온 (Calmness)               평온 (Calmness)     짜증 (Annoyance)   
18                기쁨 (Delight)            만족 (Contentment)         두려움 (Fear)   
30              즐거움 (Pleasure)               평온 (Calmness)         두려움 (Fear)   
8                 기쁨 (Delight)            만족 (Contentment)         두려움 (Fear)   
13                기쁨 (Delight)               평온 (Calmness)     피로 (Tiredness)   
34              짜증 (Annoyance)             졸림 (Sleepiness)     피로 (Tiredness)   
24              즐거움 (Pleasure)            만족 (Contentment)         두려움 (Fear)   
16              즐거움 (Pleasure)              행복 (Happiness)         두려움 (Fear)   
3   해당하는 감정이 없음(None of These)               평온 (Calmness)   만족 (Contentment)   
4               즐거움 (Pleasure)  해당하는 감정이 없음(None of These)         두려움 (Fear)   
7   해당하는 감정이 없음(None of These)              짜증 (Annoyance)     짜증 (Annoyance)   
10              즐거움 (Pleasure)               평온 (Calmness)         두려움 (Fear)   

                     Emotion_6                   Emotion_7  \
5               짜증 (Annoyance)             우울 (Depression)   
19              짜증 (Annoyance)              피로 (Tiredness)   
9              우울 (Depression)  해당하는 감정이 없음(None of These)   
2               짜증 (Annoyance)              짜증 (Annoyance)   
12              짜증 (Annoyance)             우울 (Depression)   
20  해당하는 감정이 없음(None of These)             우울 (Depression)   
14                슬픔 (Sadness)                슬픔 (Sadness)   
21              짜증 (Annoyance)             우울 (Depression)   
6               짜증 (Annoyance)             우울 (Depression)   
11              짜증 (Annoyance)             우울 (Depression)   
25              짜증 (Annoyance)  해당하는 감정이 없음(None of These)   
15              짜증 (Annoyance)            좌절 (Frustration)   
31              짜증 (Annoyance)  해당하는 감정이 없음(None of These)   
1               짜증 (Annoyance)             우울 (Depression)   
29              짜증 (Annoyance)               평온 (Calmness)   
28              짜증 (Annoyance)             우울 (Depression)   
27              짜증 (Annoyance)             우울 (Depression)   
0                평온 (Calmness)             우울 (Depression)   
26              짜증 (Annoyance)              피로 (Tiredness)   
22              짜증 (Annoyance)             우울 (Depression)   
32              짜증 (Annoyance)  해당하는 감정이 없음(None of These)   
33              짜증 (Annoyance)             우울 (Depression)   
23              피로 (Tiredness)              짜증 (Annoyance)   
17              피로 (Tiredness)             우울 (Depression)   
18                  두려움 (Fear)             우울 (Depression)   
30              짜증 (Annoyance)              짜증 (Annoyance)   
8                 슬픔 (Sadness)             우울 (Depression)   
13              짜증 (Annoyance)             우울 (Depression)   
34              짜증 (Annoyance)  해당하는 감정이 없음(None of These)   
24              짜증 (Annoyance)            좌절 (Frustration)   
16                슬픔 (Sadness)             우울 (Depression)   
3             만족 (Contentment)  해당하는 감정이 없음(None of These)   
4               피로 (Tiredness)  해당하는 감정이 없음(None of These)   
7               짜증 (Annoyance)                슬픔 (Sadness)   
10            좌절 (Frustration)             졸림 (Sleepiness)   

                     Emotion_8                   Emotion_9  \
5               행복 (Happiness)                슬픔 (Sadness)   
19              즐거움 (Pleasure)  해당하는 감정이 없음(None of These)   
9               행복 (Happiness)                슬픔 (Sadness)   
2               행복 (Happiness)                슬픔 (Sadness)   
12                기쁨 (Delight)            좌절 (Frustration)   
20                기쁨 (Delight)                슬픔 (Sadness)   
14               평온 (Calmness)              행복 (Happiness)   
21              행복 (Happiness)                슬픔 (Sadness)   
6             만족 (Contentment)                슬픔 (Sadness)   
11              행복 (Happiness)                슬픔 (Sadness)   
25              행복 (Happiness)               평온 (Calmness)   
15              행복 (Happiness)  해당하는 감정이 없음(None of These)   
31              행복 (Happiness)                슬픔 (Sadness)   
1               행복 (Happiness)             우울 (Depression)   
29            만족 (Contentment)              행복 (Happiness)   
28              행복 (Happiness)              행복 (Happiness)   
27              행복 (Happiness)                슬픔 (Sadness)   
0               즐거움 (Pleasure)            만족 (Contentment)   
26            만족 (Contentment)             우울 (Depression)   
22               평온 (Calmness)               평온 (Calmness)   
32                기쁨 (Delight)                슬픔 (Sadness)   
33              행복 (Happiness)            만족 (Contentment)   
23              행복 (Happiness)               평온 (Calmness)   
17                기쁨 (Delight)                슬픔 (Sadness)   
18              행복 (Happiness)                슬픔 (Sadness)   
30  해당하는 감정이 없음(None of These)             우울 (Depression)   
8               행복 (Happiness)                슬픔 (Sadness)   
13               평온 (Calmness)               평온 (Calmness)   
34               평온 (Calmness)                슬픔 (Sadness)   
24              행복 (Happiness)                슬픔 (Sadness)   
16                기쁨 (Delight)                슬픔 (Sadness)   
3                평온 (Calmness)                슬픔 (Sadness)   
4                평온 (Calmness)             우울 (Depression)   
7   해당하는 감정이 없음(None of These)  해당하는 감정이 없음(None of These)   
10              행복 (Happiness)               평온 (Calmness)   

                    Emotion_10  Total_Diff  
5                 기쁨 (Delight)   15.571429  
19            만족 (Contentment)   15.828571  
9               행복 (Happiness)   16.685714  
2                평온 (Calmness)   17.371429  
12              즐거움 (Pleasure)   17.542857  
20  해당하는 감정이 없음(None of These)   17.685714  
14             졸림 (Sleepiness)   17.771429  
21              행복 (Happiness)   18.485714  
6               행복 (Happiness)   19.714286  
11            만족 (Contentment)   19.714286  
25              즐거움 (Pleasure)   20.028571  
15            만족 (Contentment)   20.200000  
31              즐거움 (Pleasure)   20.228571  
1               행복 (Happiness)   21.171429  
29              행복 (Happiness)   22.228571  
28                기쁨 (Delight)   22.228571  
27               평온 (Calmness)   22.914286  
0               즐거움 (Pleasure)   23.142857  
26              즐거움 (Pleasure)   24.057143  
22              즐거움 (Pleasure)   25.200000  
32              행복 (Happiness)   25.571429  
33              즐거움 (Pleasure)   26.600000  
23                기쁨 (Delight)   26.800000  
17               평온 (Calmness)   27.000000  
18              즐거움 (Pleasure)   27.000000  
30               평온 (Calmness)   28.542857  
8                 기쁨 (Delight)   28.914286  
13  해당하는 감정이 없음(None of These)   30.400000  
34             졸림 (Sleepiness)   33.000000  
24              즐거움 (Pleasure)   33.028571  
16              즐거움 (Pleasure)   33.771429  
3                평온 (Calmness)   35.028571  
4                평온 (Calmness)   38.257143  
7   해당하는 감정이 없음(None of These)   40.600000  
10              즐거움 (Pleasure)   41.314286  

[35 rows x 32 columns]
---------------------------------------------------------------------------
PermissionError                           Traceback (most recent call last)
Cell In[23], line 54
     51 # Display the sorted results
     52 print(final_df_sorted)
---> 54 final_df_sorted.to_excel('final_df_sorted.xlsx', index=False)

File c:\Users\kgty\anaconda3\Lib\site-packages\pandas\core\generic.py:2252, in NDFrame.to_excel(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, inf_rep, freeze_panes, storage_options)
   2239 from pandas.io.formats.excel import ExcelFormatter
   2241 formatter = ExcelFormatter(
   2242     df,
   2243     na_rep=na_rep,
   (...)
   2250     inf_rep=inf_rep,
   2251 )
-> 2252 formatter.write(
   2253     excel_writer,
   2254     sheet_name=sheet_name,
   2255     startrow=startrow,
   2256     startcol=startcol,
   2257     freeze_panes=freeze_panes,
   2258     engine=engine,
   2259     storage_options=storage_options,
   2260 )

File c:\Users\kgty\anaconda3\Lib\site-packages\pandas\io\formats\excel.py:934, in ExcelFormatter.write(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options)
    930     need_save = False
    931 else:
    932     # error: Cannot instantiate abstract class 'ExcelWriter' with abstract
    933     # attributes 'engine', 'save', 'supported_extensions' and 'write_cells'
--> 934     writer = ExcelWriter(  # type: ignore[abstract]
    935         writer, engine=engine, storage_options=storage_options
    936     )
    937     need_save = True
    939 try:

File c:\Users\kgty\anaconda3\Lib\site-packages\pandas\io\excel\_openpyxl.py:60, in OpenpyxlWriter.__init__(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)
     56 from openpyxl.workbook import Workbook
     58 engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
---> 60 super().__init__(
     61     path,
     62     mode=mode,
     63     storage_options=storage_options,
     64     if_sheet_exists=if_sheet_exists,
     65     engine_kwargs=engine_kwargs,
     66 )
     68 # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
     69 # the file and later write to it
     70 if "r+" in self._mode:  # Load from existing workbook

File c:\Users\kgty\anaconda3\Lib\site-packages\pandas\io\excel\_base.py:1219, in ExcelWriter.__init__(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs)
   1215 self._handles = IOHandles(
   1216     cast(IO[bytes], path), compression={"compression": None}
   1217 )
   1218 if not isinstance(path, ExcelWriter):
-> 1219     self._handles = get_handle(
   1220         path, mode, storage_options=storage_options, is_text=False
   1221     )
   1222 self._cur_sheet = None
   1224 if date_format is None:

File c:\Users\kgty\anaconda3\Lib\site-packages\pandas\io\common.py:868, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    859         handle = open(
    860             handle,
    861             ioargs.mode,
   (...)
    864             newline="",
    865         )
    866     else:
    867         # Binary mode
--> 868         handle = open(handle, ioargs.mode)
    869     handles.append(handle)
    871 # Convert BytesIO or file objects passed with an encoding

PermissionError: [Errno 13] Permission denied: 'final_df_sorted.xlsx'
In [ ]:
 
In [ ]:
# Selecting Valence, Arousal, and Emotion columns
valence_columns = [col for col in df_filtered.columns if 'Valence_After_Movie_' in col]  # Valence columns
arousal_columns = [col for col in df_filtered.columns if 'Arousal_After_Movie_' in col]  # Arousal columns
emotion_columns = [col for col in df_filtered.columns if 'Emotion_Keyword_' in col]  # Emotion columns

# Select necessary columns for the analysis
df_selected = df_filtered[['전화번호'] + valence_columns + arousal_columns + emotion_columns]

# Loop through each person to create a table for each
for person in df_selected['전화번호'].unique():
    # Filter the data for the current person
    person_data = df_selected[df_selected['전화번호'] == person]
    
    # Initialize a list to store the result for this person
    result_list = []
    
    # Loop through each movie (10 movies)
    for i in range(10):
        valence = person_data[valence_columns[i]].values[0]
        arousal = person_data[arousal_columns[i]].values[0]
        emotion = person_data[emotion_columns[i]].values[0]
        
        # Add the result to the list along with the phone number
        result_list.append({
            'Movie': i+1,
            'Valence': valence,
            'Arousal': arousal,
            'Emotion': emotion,
            '전화번호': person  # Add the phone number here
        })
    
    # Convert the result list to a DataFrame for the current person
    result_df = pd.DataFrame(result_list)

    # Display the DataFrame for the current person
    print(result_df)
   Movie  Valence  Arousal            Emotion        전화번호
0      1        5        5      평온 (Calmness)  1049468170
1      2        5        5      평온 (Calmness)  1049468170
2      3        7        6   만족 (Contentment)  1049468170
3      4        7        7   만족 (Contentment)  1049468170
4      5        2        6  놀람 (Astonishment)  1049468170
5      6        5        5      평온 (Calmness)  1049468170
6      7        2        4    우울 (Depression)  1049468170
7      8        6        4     즐거움 (Pleasure)  1049468170
8      9        6        3   만족 (Contentment)  1049468170
9     10        7        5     즐거움 (Pleasure)  1049468170
   Movie  Valence  Arousal          Emotion        전화번호
0      1        6        3   행복 (Happiness)  1051350406
1      2        7        5   즐거움 (Pleasure)  1051350406
2      3        8        7     기쁨 (Delight)  1051350406
3      4        8        4    평온 (Calmness)  1051350406
4      5        2        7   짜증 (Annoyance)  1051350406
5      6        3        5   짜증 (Annoyance)  1051350406
6      7        3        3  우울 (Depression)  1051350406
7      8        6        3   행복 (Happiness)  1051350406
8      9        2        4  우울 (Depression)  1051350406
9     10        7        6   행복 (Happiness)  1051350406
   Movie  Valence  Arousal                     Emotion        전화번호
0      1        6        2               평온 (Calmness)  1022307441
1      2        6        6  해당하는 감정이 없음(None of These)  1022307441
2      3        8        6                기쁨 (Delight)  1022307441
3      4        6        2               평온 (Calmness)  1022307441
4      5        4        7                  두려움 (Fear)  1022307441
5      6        4        6              짜증 (Annoyance)  1022307441
6      7        3        3             우울 (Depression)  1022307441
7      8        7        3              행복 (Happiness)  1022307441
8      9        5        4                슬픔 (Sadness)  1022307441
9     10        7        6                기쁨 (Delight)  1022307441
   Movie  Valence  Arousal           Emotion        전화번호
0      1        5        1     평온 (Calmness)  1067510885
1      2        5        3      슬픔 (Sadness)  1067510885
2      3        7        6    즐거움 (Pleasure)  1067510885
3      4        7        5    행복 (Happiness)  1067510885
4      5        2        7    짜증 (Annoyance)  1067510885
5      6        5        5    짜증 (Annoyance)  1067510885
6      7        3        3   우울 (Depression)  1067510885
7      8        7        5  만족 (Contentment)  1067510885
8      9        4        5      슬픔 (Sadness)  1067510885
9     10        7        6    행복 (Happiness)  1067510885
   Movie  Valence  Arousal                     Emotion        전화번호
0      1        5        1               평온 (Calmness)  1027642730
1      2        5        1               평온 (Calmness)  1027642730
2      3        7        2  해당하는 감정이 없음(None of These)  1027642730
3      4        6        2               평온 (Calmness)  1027642730
4      5        1        7           놀람 (Astonishment)  1027642730
5      6        3        5             우울 (Depression)  1027642730
6      7        3        4  해당하는 감정이 없음(None of These)  1027642730
7      8        7        3              행복 (Happiness)  1027642730
8      9        5        1                슬픔 (Sadness)  1027642730
9     10        7        3              행복 (Happiness)  1027642730
   Movie  Valence  Arousal                     Emotion        전화번호
0      1        5        3               평온 (Calmness)  1064186860
1      2        5        3  해당하는 감정이 없음(None of These)  1064186860
2      3        5        3              즐거움 (Pleasure)  1064186860
3      4        5        3  해당하는 감정이 없음(None of These)  1064186860
4      5        1        7                  두려움 (Fear)  1064186860
5      6        2        6              짜증 (Annoyance)  1064186860
6      7        3        5             우울 (Depression)  1064186860
7      8        7        3                기쁨 (Delight)  1064186860
8      9        4        3            좌절 (Frustration)  1064186860
9     10        6        3              즐거움 (Pleasure)  1064186860
   Movie  Valence  Arousal          Emotion        전화번호
0      1        7        5    평온 (Calmness)  1072325997
1      2        7        5  졸림 (Sleepiness)  1072325997
2      3        7        5   즐거움 (Pleasure)  1072325997
3      4        5        4   즐거움 (Pleasure)  1072325997
4      5        3        6   짜증 (Annoyance)  1072325997
5      6        3        5     슬픔 (Sadness)  1072325997
6      7        3        1     슬픔 (Sadness)  1072325997
7      8        8        3    평온 (Calmness)  1072325997
8      9        6        3   행복 (Happiness)  1072325997
9     10        7        5  졸림 (Sleepiness)  1072325997
   Movie  Valence  Arousal                     Emotion        전화번호
0      1        5        5               평온 (Calmness)  1023732291
1      2        5        6               평온 (Calmness)  1023732291
2      3        7        6              행복 (Happiness)  1023732291
3      4        7        2            만족 (Contentment)  1023732291
4      5        3        7             졸림 (Sleepiness)  1023732291
5      6        3        6              짜증 (Annoyance)  1023732291
6      7        1        1            좌절 (Frustration)  1023732291
7      8        8        3              행복 (Happiness)  1023732291
8      9        5        1  해당하는 감정이 없음(None of These)  1023732291
9     10        7        5            만족 (Contentment)  1023732291
   Movie  Valence  Arousal          Emotion        전화번호
0      1        6        1    평온 (Calmness)  1029982403
1      2        6        1    평온 (Calmness)  1029982403
2      3        8        7   즐거움 (Pleasure)  1029982403
3      4        6        3    평온 (Calmness)  1029982403
4      5        1        8       두려움 (Fear)  1029982403
5      6        3        6   짜증 (Annoyance)  1029982403
6      7        5        2  우울 (Depression)  1029982403
7      8        7        4   행복 (Happiness)  1029982403
8      9        6        2     슬픔 (Sadness)  1029982403
9     10        7        6   행복 (Happiness)  1029982403
   Movie  Valence  Arousal           Emotion       전화번호
0      1        6        1     평온 (Calmness)  104235208
1      2        6        1     평온 (Calmness)  104235208
2      3        8        7    즐거움 (Pleasure)  104235208
3      4        6        4     평온 (Calmness)  104235208
4      5        4        6        두려움 (Fear)  104235208
5      6        3        7    짜증 (Annoyance)  104235208
6      7        5        4     평온 (Calmness)  104235208
7      8        7        3  만족 (Contentment)  104235208
8      9        8        3    행복 (Happiness)  104235208
9     10        8        7    행복 (Happiness)  104235208
   Movie  Valence  Arousal                     Emotion        전화번호
0      1        5        1  해당하는 감정이 없음(None of These)  1044575769
1      2        5        5  해당하는 감정이 없음(None of These)  1044575769
2      3        8        7              즐거움 (Pleasure)  1044575769
3      4        6        3               평온 (Calmness)  1044575769
4      5        2        8                  두려움 (Fear)  1044575769
5      6        2        7              짜증 (Annoyance)  1044575769
6      7        1        7              짜증 (Annoyance)  1044575769
7      8        5        5  해당하는 감정이 없음(None of These)  1044575769
8      9        4        6             우울 (Depression)  1044575769
9     10        6        4               평온 (Calmness)  1044575769
In [ ]:

In [ ]:
 
In [ ]:
# Selecting Valence, Arousal, and Emotion columns
valence_columns = [col for col in df_filtered.columns if 'Valence_After_Movie_' in col]  # Valence columns
arousal_columns = [col for col in df_filtered.columns if 'Arousal_After_Movie_' in col]  # Arousal columns
emotion_columns = [col for col in df_filtered.columns if 'Emotion_Keyword_' in col]  # Emotion columns

# Select necessary columns for the analysis
df_selected = df_filtered[['전화번호'] + valence_columns + arousal_columns + emotion_columns]

# Add valence, arousal, and emotion for each movie
for i in range(10):
    df_selected[f'valence_{i+1}'] = df_selected[valence_columns[i]].copy()
    df_selected[f'arousal_{i+1}'] = df_selected[arousal_columns[i]].copy()
    df_selected[f'emotion_{i+1}'] = df_selected[emotion_columns[i]].copy()

# Get unique participants
people = df_selected['전화번호'].unique()

# Plot for each person
for person in people:
    try:
        # Extract data for the person
        person_data = df_selected[df_selected['전화번호'] == person]
        
        plt.figure(figsize=(10, 6))

        # Plot each movie's valence and arousal
        for i in range(10):
            valence = person_data[f'valence_{i+1}'].values[0]
            arousal = person_data[f'arousal_{i+1}'].values[0]
            emotion = person_data[f'emotion_{i+1}'].values[0]  # 감정 키워드

            # Plot each movie's point with labels
            plt.scatter(valence, arousal, label=f'Movie {i+1}', color='blue')
            plt.text(valence + 0.2, arousal + 0.2, f'{i+1}\n{emotion}', fontsize=10, color='red')

        # Add labels and title
        plt.title(f'{person}의 10개 영화에 대한 감정 분석')
        plt.xlabel('Valence')
        plt.ylabel('Arousal')

        # Set limits and remove grid
        plt.xlim(0, 10) 
        plt.ylim(0, 10)
        plt.grid(False)

        # Draw quadrant axes
        plt.axhline(y=5, color='black', linewidth=1.5)  # Horizontal mid-line
        plt.axvline(x=5, color='black', linewidth=1.5)  # Vertical mid-line

        plt.show() 
    
    except Exception as e:
        print(f"Error while processing {person}: {e}")
        continue
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_7264\1955633905.py in <module>
      2 arousal_columns = [col for col in df.columns if '이 영화를 보고난 후 흥분(Excited)되거나 긴장되었다' in col]  # Excitement columns
      3 
----> 4 df_selected = df[['Submission ID'] + valence_columns + arousal_columns]
      5 
      6 for i in range(10):

c:\Users\kgty\anaconda3\envs\kocca\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   3462             if is_iterator(key):
   3463                 key = list(key)
-> 3464             indexer = self.loc._get_listlike_indexer(key, axis=1)[1]
   3465 
   3466         # take() does not accept boolean indexers

c:\Users\kgty\anaconda3\envs\kocca\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis)
   1312             keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
   1313 
-> 1314         self._validate_read_indexer(keyarr, indexer, axis)
   1315 
   1316         if needs_i8_conversion(ax.dtype) or isinstance(

c:\Users\kgty\anaconda3\envs\kocca\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis)
   1372                 if use_interval_msg:
   1373                     key = list(key)
-> 1374                 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   1375 
   1376             not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())

KeyError: "None of [Index(['Submission ID'], dtype='object')] are in the [columns]"
In [ ]:
def classify_quadrant(valence, arousal):
    if valence >= 5 and arousal >= 5:
        return 'Top-right'
    elif valence < 5 and arousal >= 5:
        return 'Top-left'
    elif valence >= 5 and arousal < 5:
        return 'Bottom-right'
    else:
        return 'Bottom-left'

movie_quadrants_count = {f'영화 {i+1}': {'Top-right': 0, 'Top-left': 0, 'Bottom-right': 0, 'Bottom-left': 0} 
                         for i in range(10)}

for _, row in df_selected.iterrows():
    for i in range(10):
        valence = row[valence_columns[i]]
        arousal = row[arousal_columns[i]]
        
        quadrant = classify_quadrant(valence, arousal)
        
        movie_quadrants_count[f'영화 {i+1}'][quadrant] += 1

sorted_movies = sorted(movie_quadrants_count.items(), key=lambda x: sum(x[1].values()), reverse=True)

print("영화별 사분면에 포함된 횟수 순위:")
for movie, quadrants in sorted_movies:
    print(f"{movie}: {quadrants}")
영화별 사분면에 포함된 횟수 순위:
영화 1: {'Top-right': 6, 'Top-left': 0, 'Bottom-right': 19, 'Bottom-left': 1}
영화 2: {'Top-right': 6, 'Top-left': 0, 'Bottom-right': 20, 'Bottom-left': 0}
영화 3: {'Top-right': 17, 'Top-left': 1, 'Bottom-right': 6, 'Bottom-left': 2}
영화 4: {'Top-right': 4, 'Top-left': 0, 'Bottom-right': 21, 'Bottom-left': 1}
영화 5: {'Top-right': 1, 'Top-left': 20, 'Bottom-right': 0, 'Bottom-left': 5}
영화 6: {'Top-right': 4, 'Top-left': 16, 'Bottom-right': 0, 'Bottom-left': 6}
영화 7: {'Top-right': 0, 'Top-left': 2, 'Bottom-right': 1, 'Bottom-left': 23}
영화 8: {'Top-right': 2, 'Top-left': 0, 'Bottom-right': 23, 'Bottom-left': 1}
영화 9: {'Top-right': 3, 'Top-left': 2, 'Bottom-right': 14, 'Bottom-left': 7}
영화 10: {'Top-right': 13, 'Top-left': 0, 'Bottom-right': 13, 'Bottom-left': 0}
In [ ]:
emotion_to_coordinates = {
    '기쁨 (Delight)': (7.454545, 4.909091),
    '행복 (Arousal)': (7.000000, 3.210526),
    '즐거움 (Pleasure)': (7.166667, 5.777778),
    '두려움 (Fear)': (1.800000, 6.500000),
    '짜증 (Annoyance)': (2.529412, 4.764706),
    '좌절 (Frustration)': (3.333333, 4.333333),
    '만족 (Contentment)': (6.750000, 4.666667),
    '평온 (Calmness)': (5.909091, 2.340909),
    '놀람 (Astonishment)': (3.800000, 4.800000),
    '슬픔 (Sadness)': (3.687500, 3.250000),
    '우울 (Depression)': (2.562500, 3.250000),
    '피로 (Tiredness)': (2.750000, 3.000000),
    '해당하는 감정이 없음(None of These)': (4.684211, 2.578947)
}

emotion_columns = [col for col in df.columns if '다음 중 감정을 가장 잘 나타내는 단어' in col]

df_selected = df[['Submission ID'] + emotion_columns] 

for i in range(10):
    df_selected[f'emotion_{i+1}'] = df_selected[emotion_columns[i]].copy()

people = df_selected['Submission ID'].unique()

for person in people:
    try:
        person_data = df_selected[df_selected['Submission ID'] == person]
        
        plt.figure(figsize=(10, 6))

        quadrants_count = {'Top-right': 0, 'Top-left': 0, 'Bottom-right': 0, 'Bottom-left': 0, 'None': 0}

        for i in range(10):
            emotion = person_data[f'emotion_{i+1}'].values[0]
            emotion_coordinate = emotion_to_coordinates.get(emotion, (5, 5))
            emotion_valence, emotion_arousal = emotion_coordinate

            quadrant = 'None'
            if emotion_valence >= 5 and emotion_arousal >= 5:
                quadrant = 'Top-right'
            elif emotion_valence < 5 and emotion_arousal >= 5:
                quadrant = 'Top-left'
            elif emotion_valence >= 5 and emotion_arousal < 5:
                quadrant = 'Bottom-right'
            elif emotion_valence < 5 and emotion_arousal < 5:
                quadrant = 'Bottom-left'
            
            quadrants_count[quadrant] += 1

            plt.scatter(emotion_valence, emotion_arousal, label=f'영화 {i+1} 감정', color='blue', marker='o')

        plt.text(8.5, 8, f'Top-right: {quadrants_count["Top-right"]}', fontsize=12, color='green')
        plt.text(8.5, 7.5, f'Top-left: {quadrants_count["Top-left"]}', fontsize=12, color='green')
        plt.text(8.5, 7, f'Bottom-right: {quadrants_count["Bottom-right"]}', fontsize=12, color='green')
        plt.text(8.5, 6.5, f'Bottom-left: {quadrants_count["Bottom-left"]}', fontsize=12, color='green')
        plt.text(8.5, 6, f'None: {quadrants_count["None"]}', fontsize=12, color='green')

        plt.title(f'{person}의 10개 영화에 대한 감정 좌표 분석')
        plt.xlabel('Valence')
        plt.ylabel('Arousal')
        plt.xlim(0, 10)
        plt.ylim(0, 10) 
        plt.grid(True)
        plt.show() 

    except Exception as e:
        print(f"Error while processing {person}: {e}")
        continue 
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
df_selected.head()
Out[ ]:
Submission ID 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요. 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..1 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..2 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..3 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..4 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..5 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..6 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..7 다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요..8 ... emotion_1 emotion_2 emotion_3 emotion_4 emotion_5 emotion_6 emotion_7 emotion_8 emotion_9 emotion_10
0 fb14df8ad543d1 평온 (Calmness) 평온 (Calmness) 만족 (Contentment) 만족 (Contentment) 놀람 (Astonishment) 평온 (Calmness) 우울 (Depression) 즐거움 (Pleasure) 만족 (Contentment) ... 평온 (Calmness) 평온 (Calmness) 만족 (Contentment) 만족 (Contentment) 놀람 (Astonishment) 평온 (Calmness) 우울 (Depression) 즐거움 (Pleasure) 만족 (Contentment) 즐거움 (Pleasure)
1 2686e6d7d543d1 행복 (Happiness) 즐거움 (Pleasure) 기쁨 (Delight) 평온 (Calmness) 짜증 (Annoyance) 짜증 (Annoyance) 우울 (Depression) 행복 (Happiness) 우울 (Depression) ... 행복 (Happiness) 즐거움 (Pleasure) 기쁨 (Delight) 평온 (Calmness) 짜증 (Annoyance) 짜증 (Annoyance) 우울 (Depression) 행복 (Happiness) 우울 (Depression) 행복 (Happiness)
2 3b6e7dc0d543d1 해당하는 감정이 없음(None of These) 평온 (Calmness) 즐거움 (Pleasure) 평온 (Calmness) 우울 (Depression) 짜증 (Annoyance) 짜증 (Annoyance) 행복 (Happiness) 슬픔 (Sadness) ... 해당하는 감정이 없음(None of These) 평온 (Calmness) 즐거움 (Pleasure) 평온 (Calmness) 우울 (Depression) 짜증 (Annoyance) 짜증 (Annoyance) 행복 (Happiness) 슬픔 (Sadness) 평온 (Calmness)
3 7e237f92deb748 평온 (Calmness) 평온 (Calmness) 해당하는 감정이 없음(None of These) 평온 (Calmness) 만족 (Contentment) 만족 (Contentment) 해당하는 감정이 없음(None of These) 평온 (Calmness) 슬픔 (Sadness) ... 평온 (Calmness) 평온 (Calmness) 해당하는 감정이 없음(None of These) 평온 (Calmness) 만족 (Contentment) 만족 (Contentment) 해당하는 감정이 없음(None of These) 평온 (Calmness) 슬픔 (Sadness) 평온 (Calmness)
4 de6d9c4a0170b 행복 (Happiness) 평온 (Calmness) 즐거움 (Pleasure) 해당하는 감정이 없음(None of These) 두려움 (Fear) 피로 (Tiredness) 해당하는 감정이 없음(None of These) 평온 (Calmness) 우울 (Depression) ... 행복 (Happiness) 평온 (Calmness) 즐거움 (Pleasure) 해당하는 감정이 없음(None of These) 두려움 (Fear) 피로 (Tiredness) 해당하는 감정이 없음(None of These) 평온 (Calmness) 우울 (Depression) 평온 (Calmness)

5 rows × 21 columns

In [8]:
for i in range(10):
    df_selected[f'emotion_{i+1}'] = df_selected[emotion_columns[i]].copy()

emotion_rank_per_movie = {}

for i in range(10):
    emotion_counts = {emotion: 0 for emotion in emotion_to_coordinates.keys()}

    for person in df_selected['Submission ID']:
        emotion = df_selected.loc[df_selected['Submission ID'] == person, f'emotion_{i+1}'].values[0]
        if emotion in emotion_counts:
            emotion_counts[emotion] += 1

    sorted_emotions = sorted(emotion_counts.items(), key=lambda x: x[1], reverse=True)
    emotion_rank_per_movie[f'영화 {i+1}'] = sorted_emotions

for movie, rankings in emotion_rank_per_movie.items():
    print(f"\n{movie} 감정 순위:")
    for emotion, count in rankings:
        print(f"{emotion}: {count}회")
C:\Users\kgty\AppData\Local\Temp\ipykernel_22044\4289841536.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_selected[f'emotion_{i+1}'] = df_selected[emotion_columns[i]].copy()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[8], line 7
      4 emotion_rank_per_movie = {}
      6 for i in range(10):
----> 7     emotion_counts = {emotion: 0 for emotion in emotion_to_coordinates.keys()}
      9     for person in df_selected['Submission ID']:
     10         emotion = df_selected.loc[df_selected['Submission ID'] == person, f'emotion_{i+1}'].values[0]

NameError: name 'emotion_to_coordinates' is not defined
In [9]:
df_selected = df[['Submission ID'] + emotion_columns + valence_columns + arousal_columns]

emotion_va_mean = []

for person in df_selected['Submission ID'].unique():
    person_data = df_selected[df_selected['Submission ID'] == person]
    
    for i in range(len(emotion_columns)):
        emotion = person_data[emotion_columns[i]].values[0]
        
        valence = person_data[valence_columns[i]].values[0]
        arousal = person_data[arousal_columns[i]].values[0]
        
        emotion_va_mean.append({
            'Submission ID': person,
            'Emotion': emotion,
            'Valence': valence,
            'Arousal': arousal,
            'Mean VA': (valence + arousal) / 2 
        })

df_va_mean = pd.DataFrame(emotion_va_mean)

emotion_avg_va = df_va_mean.groupby('Emotion')[['Valence', 'Arousal', 'Mean VA']].mean()
emotion_response_count = df_va_mean.groupby('Emotion').size() 

emotion_avg_va['Response Count'] = emotion_response_count

print(emotion_avg_va)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[9], line 1
----> 1 df_selected = df[['Submission ID'] + emotion_columns + valence_columns + arousal_columns]
      3 emotion_va_mean = []
      5 for person in df_selected['Submission ID'].unique():

File c:\Users\kgty\anaconda3\Lib\site-packages\pandas\core\frame.py:3767, in DataFrame.__getitem__(self, key)
   3765     if is_iterator(key):
   3766         key = list(key)
-> 3767     indexer = self.columns._get_indexer_strict(key, "columns")[1]
   3769 # take() does not accept boolean indexers
   3770 if getattr(indexer, "dtype", None) == bool:

File c:\Users\kgty\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:5877, in Index._get_indexer_strict(self, key, axis_name)
   5874 else:
   5875     keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> 5877 self._raise_if_missing(keyarr, indexer, axis_name)
   5879 keyarr = self.take(indexer)
   5880 if isinstance(key, Index):
   5881     # GH 42790 - Preserve name from an Index

File c:\Users\kgty\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:5941, in Index._raise_if_missing(self, key, indexer, axis_name)
   5938     raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   5940 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
-> 5941 raise KeyError(f"{not_found} not in index")

KeyError: "['Submission ID'] not in index"

여기서 부터 추가된 코드 ---------------------------------------------------¶

In [194]:
# 중복 응답률(Redundancy Rate) 계산 함수
def calculate_diversity(series):
    unique_values = len(set(series))  # 고유한 값 개수
    total_values = len(series)  # 전체 응답 개수
    diversity_ratio = unique_values / total_values  # 다양성 비율
    redundancy_rate = (1 - diversity_ratio) * 100  # 중복 응답률 (%)
    
    return pd.Series([unique_values, diversity_ratio, redundancy_rate], 
                     index=['Unique_Count', 'Diversity_Ratio', 'Redundancy_Rate'])

# 개인별 응답 신뢰도 분석
df['Valence_Diversity'] = df_selected[valence_columns].apply(calculate_diversity, axis=1)['Diversity_Ratio']
df['Arousal_Diversity'] = df_selected[arousal_columns].apply(calculate_diversity, axis=1)['Diversity_Ratio']

# 중복 응답률이 너무 높은 사용자 제거 (예: Diversity Ratio < 0.5인 경우)
df_filtered = df[(df['Valence_Diversity'] >= 0.4) & (df['Arousal_Diversity'] >= 0.4)].reset_index(drop=True)
In [195]:
df_excluded = df[~((df['Valence_Diversity'] >= 0.4) & (df['Arousal_Diversity'] >= 0.4))]
df_excluded[['Submission ID']]
Out[195]:
Submission ID
3 7e237f92deb748
7 5e877a85417bf3
10 7e4950a4417bf3
In [196]:
import matplotlib.pyplot as plt
import numpy as np

# Submission ID의 앞 4자리 추출
df_filtered['Submission_ID_Short'] = df_filtered['Submission ID'].astype(str).str[:4]
df_excluded['Submission_ID_Short'] = df_excluded['Submission ID'].astype(str).str[:4]

# 그래프 설정
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6), sharey=True)

# 막대 너비 설정
bar_width = 0.4
x_filtered = np.arange(len(df_filtered))
x_excluded = np.arange(len(df_excluded))

# df_filtered 막대 그래프 생성
axes[0].bar(x_filtered - bar_width/2, df_filtered['Valence_Diversity'], bar_width, label='Valence Diversity', color='#1f77b4', alpha=0.7)
axes[0].bar(x_filtered + bar_width/2, df_filtered['Arousal_Diversity'], bar_width, label='Arousal Diversity', color='#ff7f0e', alpha=0.7)
axes[0].set_xticks(x_filtered)
axes[0].set_xticklabels(df_filtered['Submission_ID_Short'].values, rotation=45, ha='right')
axes[0].set_ylabel('Diversity Score')
axes[0].set_title('Filtered: Valence & Arousal Diversity')
axes[0].legend()
axes[0].grid(axis='y', linestyle='--', alpha=0.7)

# df_excluded 막대 그래프 생성
axes[1].bar(x_excluded - bar_width/2, df_excluded['Valence_Diversity'], bar_width, label='Valence Diversity', color='#1f77b4', alpha=0.7)
axes[1].bar(x_excluded + bar_width/2, df_excluded['Arousal_Diversity'], bar_width, label='Arousal Diversity', color='#ff7f0e', alpha=0.7)
axes[1].set_xticks(x_excluded)
axes[1].set_xticklabels(df_excluded['Submission_ID_Short'].values, rotation=45, ha='right')
axes[1].set_xlabel('Submission ID (First 4 Digits)')
axes[1].set_title('Excluded: Valence & Arousal Diversity')
axes[1].legend()
axes[1].grid(axis='y', linestyle='--', alpha=0.7)

# 그래프 출력
plt.tight_layout()
plt.show()
No description has been provided for this image
In [197]:
pd.set_option("display.max_rows", None)  # 모든 행 출력

len(df_filtered)
Out[197]:
23
In [201]:
emotion_columns = [col for col in df.columns if '다음 중 감정을 가장 잘 나타내는 단어를 골라 주세요.' in col]  


df_filtered[emotion_columns]

# 중복 응답률 계산 함수
def calculate_text_diversity(series):
    unique_values = len(set(series))  # 고유한 응답 개수
    total_values = len(series)  # 전체 응답 개수
    if total_values == 0:
        print('0 is exist')
    diversity_ratio = unique_values / total_values  # 다양성 비율
    redundancy_rate = (1 - diversity_ratio) * 100  # 중복 응답률 (%)
    return pd.Series([unique_values, diversity_ratio, redundancy_rate], 
                     index=['Unique_Count', 'Diversity_Ratio', 'Redundancy_Rate'])


df_filtered['Emotion_Diversity'] = df_filtered[emotion_columns].apply(calculate_text_diversity, axis=1)['Diversity_Ratio'].reset_index(drop=True)

df_filtered

df_excluded = df_filtered[df_filtered['Emotion_Diversity'] < 0.6].reset_index(drop=True)


df_filtered = df_filtered[df_filtered['Emotion_Diversity'] >= 0.6].reset_index(drop=True)
In [205]:
df_excluded[['Submission ID']]
Out[205]:
Submission ID
0 fb14df8ad543d1
1 7ae6137a6db290
In [ ]: